{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>orderID</th>\n",
       "      <th>product</th>\n",
       "      <th>category</th>\n",
       "      <th>userID</th>\n",
       "      <th>monetary</th>\n",
       "      <th>date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>O1</td>\n",
       "      <td>木桌</td>\n",
       "      <td>家具</td>\n",
       "      <td>A</td>\n",
       "      <td>400</td>\n",
       "      <td>2021-01-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>O2</td>\n",
       "      <td>薯片</td>\n",
       "      <td>食品</td>\n",
       "      <td>B</td>\n",
       "      <td>10</td>\n",
       "      <td>2021-01-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>O3</td>\n",
       "      <td>椅子</td>\n",
       "      <td>家具</td>\n",
       "      <td>A</td>\n",
       "      <td>100</td>\n",
       "      <td>2021-01-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>O4</td>\n",
       "      <td>毛衣</td>\n",
       "      <td>服装</td>\n",
       "      <td>C</td>\n",
       "      <td>200</td>\n",
       "      <td>2021-01-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>O5</td>\n",
       "      <td>面包</td>\n",
       "      <td>食品</td>\n",
       "      <td>A</td>\n",
       "      <td>25</td>\n",
       "      <td>2021-01-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>O6</td>\n",
       "      <td>牛仔裤</td>\n",
       "      <td>服装</td>\n",
       "      <td>B</td>\n",
       "      <td>150</td>\n",
       "      <td>2021-01-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>O7</td>\n",
       "      <td>饮用水</td>\n",
       "      <td>食品</td>\n",
       "      <td>D</td>\n",
       "      <td>60</td>\n",
       "      <td>2021-01-04</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  orderID product category userID  monetary       date\n",
       "0      O1      木桌       家具      A       400 2021-01-01\n",
       "1      O2      薯片       食品      B        10 2021-01-01\n",
       "2      O3      椅子       家具      A       100 2021-01-02\n",
       "3      O4      毛衣       服装      C       200 2021-01-02\n",
       "4      O5      面包       食品      A        25 2021-01-03\n",
       "5      O6     牛仔裤       服装      B       150 2021-01-03\n",
       "6      O7     饮用水       食品      D        60 2021-01-04"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 读取数据集\n",
    "df1 = pd.read_excel('./示例文件.xlsx',sheet_name='data1')\n",
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userID</th>\n",
       "      <th>reg_date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>2021-01-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B</td>\n",
       "      <td>2021-01-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>C</td>\n",
       "      <td>2021-01-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>D</td>\n",
       "      <td>2021-01-02</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  userID   reg_date\n",
       "0      A 2021-01-01\n",
       "1      B 2021-01-01\n",
       "2      C 2021-01-02\n",
       "3      D 2021-01-02"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2 = pd.read_excel('./示例文件.xlsx',sheet_name='data2')\n",
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userID</th>\n",
       "      <th>monetary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>525</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B</td>\n",
       "      <td>160</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>C</td>\n",
       "      <td>200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>D</td>\n",
       "      <td>60</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  userID  monetary\n",
       "0      A       525\n",
       "1      B       160\n",
       "2      C       200\n",
       "3      D        60"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 筛选出各用户消费的总金额\n",
    "df = df1.groupby(['userID'])['monetary'].sum().reset_index()\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>category</th>\n",
       "      <th>orderID</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>家具</td>\n",
       "      <td>O1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>服装</td>\n",
       "      <td>O4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>食品</td>\n",
       "      <td>O7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  category orderID\n",
       "0       家具      O1\n",
       "3       服装      O4\n",
       "6       食品      O7"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 筛选出各品类消费金额最多的订单\n",
    "df1['组内降序排名'] = df1.groupby(['category'])['monetary'].rank(method='dense',ascending=False).astype(int)\n",
    "df = df1[df1['组内降序排名'] == 1]\n",
    "df = df[['category','orderID']]\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "83.75"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 筛选出老用户（注册日当日未新用户）的平均购买金额\n",
    "df_merge = pd.merge(df1,df2,on='userID',how='left')\n",
    "df_merge['用户分类'] = np.where(df_merge['date'] == df_merge['reg_date'],'新用户','老用户')\n",
    "df_merge = df_merge[df_merge['用户分类'] == '老用户']\n",
    "avg_monetary = df_merge['monetary'].mean()\n",
    "avg_monetary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userID</th>\n",
       "      <th>rank</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  userID  rank\n",
       "0      A     3"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 筛选出连续 3 日有购买记录的用户\n",
    "df = df1[['date','userID']].drop_duplicates()\n",
    "df['rank'] = df['date'].groupby(df['userID']).rank(method='dense',ascending=True).astype(int)\n",
    "df = df.sort_values(by=['userID','date'])\n",
    "df['days'] = df['rank'].map(lambda x:pd.Timedelta(days=x))\n",
    "df['minus'] = df['date'] - df['days']\n",
    "df_group1 = df.groupby(['userID','minus'])['rank'].max().reset_index()\n",
    "df_group2 = df_group1.groupby('userID')['rank'].max().reset_index()\n",
    "df = df_group2[df_group2['rank'] == 3]\n",
    "df"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
